07 Velocity RPE no Naive¶
This notebook analyzes merges looms into adata.
Initialization¶
In [1]:
# First import the necessary packages
import scvelo as scv
import scanpy as sc
import cellrank as cr
import numpy as np
import pandas as pd
import anndata as ad
import os
from datetime import datetime as dt
# Set the verbosity of scVelo
scv.settings.verbosity = 2
scv.settings.set_figure_params('scvelo', facecolor='white', dpi=200, frameon=False)
cr.settings.verbosity = 2
sc.settings.verbosity = 2
sc.logging.print_header()
scanpy==1.9.6 anndata==0.10.3 umap==0.5.5 numpy==1.24.4 scipy==1.11.3 pandas==2.1.1 scikit-learn==1.1.3 statsmodels==0.14.0 igraph==0.11.3 louvain==0.8.1 pynndescent==0.5.11
In [2]:
# Name Variables and Settings
prefix = "07_" # Filename prefix
suffix = "_velocity_RPEnoNaive" # Filename suffix
savedata = True # Save data at the end
# Set working directory
os.chdir("/home/dalbao/2023-012-Runx3mutD8scRNA/AlbaoRunx3Manuscript/single_cell/07_velocity")
# Determine work location
print("The work location for this notebook is: " + os.getcwd() + "\n")
# Get a timestamp for the start of the run
timestamp = dt.now()
print("This notebook was last run on " + timestamp.strftime("%y-%m-%d %H:%M") + " UTC\n")
# Determine the filename for the expected output h5ad
fn = prefix + timestamp.strftime("%y-%m-%d-%H-%M")
print("The filename for the AnnData output of this notebook will be:")
print(fn + suffix + ".h5ad")
print("which will be saved in the WORKDIR/h5ad/ folder.\n")
The work location for this notebook is: /home/dalbao/2023-012-Runx3mutD8scRNA/AlbaoRunx3Manuscript/single_cell/07_velocity This notebook was last run on 25-12-09 12:32 UTC The filename for the AnnData output of this notebook will be: 07_25-12-09-12-32_velocity_RPEnoNaive.h5ad which will be saved in the WORKDIR/h5ad/ folder.
In [3]:
# Determine input file
input = "07_25-12-09-09-20_loomed_RPEnoNaive.h5ad"
print("The input h5ad file is: h5ad/" + input)
# List items in outs folder
adata = ad.read_h5ad("../../h5ad/" + input)
# Inspect AnnData
print(adata)
sc.pl.umap(adata, color = "leiden_r1")
adata.obs.group.value_counts()
The input h5ad file is: h5ad/07_25-12-09-09-20_loomed_RPEnoNaive.h5ad
AnnData object with n_obs × n_vars = 15770 × 18505
obs: 'experiment', 'group', 'timepoint', 'infection', 'cell_id', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'doublet_score', 'predicted_doublet', 'scDblFinder_score', 'scDblFinder_class', 'gem', 'QC', 'n_genes', 'S_score', 'G2M_score', 'phase', 'leiden_r1.5', 'leiden_r1', 'leiden_r0.9', 'leiden_r0.8', 'leiden_r0.7', 'leiden_r0.6', 'leiden_r0.5', 'leiden_r0.4', 'leiden_r0.3', 'leiden_r0.2', 'leiden_r0.1', 'leiden_r1_as_resolved', 'sample_batch', 'batch', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size'
var: 'mt', 'ribo', 'n_cells_by_counts-gem1', 'mean_counts-gem1', 'log1p_mean_counts-gem1', 'pct_dropout_by_counts-gem1', 'total_counts-gem1', 'log1p_total_counts-gem1', 'n_cells_by_counts-gem2', 'mean_counts-gem2', 'log1p_mean_counts-gem2', 'pct_dropout_by_counts-gem2', 'total_counts-gem2', 'log1p_total_counts-gem2', 'n_cells', 'n_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std', 'Accession', 'Chromosome', 'End', 'Start', 'Strand'
uns: 'group_colors', 'hvg', 'leiden', 'leiden_r0.1_colors', 'leiden_r0.2_colors', 'leiden_r0.3_colors', 'leiden_r0.4_colors', 'leiden_r0.5_colors', 'leiden_r0.6_colors', 'leiden_r0.7_colors', 'leiden_r0.8_colors', 'leiden_r0.9_colors', 'leiden_r1.5_colors', 'leiden_r1_colors', 'leiden_r1_sizes', 'log1p', 'neighbors', 'paga', 'pca', 'phase_colors', 'umap', 'wilcoxon'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'ambiguous', 'counts', 'matrix', 'soupX_counts', 'spliced', 'unspliced'
obsp: 'connectivities', 'distances'
/opt/conda/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
Out[3]:
group WT 4830 Null 4565 Base 4197 d8 1209 d5 969 Name: count, dtype: int64
Calculate Velocities¶
In [4]:
# Check proportion of spliced and unspliced reads
scv.pl.proportions(adata, groupby='leiden_r1')
scv.pl.proportions(adata, groupby='group')
In [5]:
# Pre-processing
scv.pp.filter_and_normalize(adata, layers_normalize = ['spliced', 'unspliced'], log = False)
scv.pp.moments(adata)
Normalized count data: X, spliced, unspliced.
computing neighbors
finished (0:00:12)
computing moments based on connectivities
finished (0:00:09)
In [6]:
# Compute velocity
scv.tl.velocity(adata, mode='stochastic')
scv.tl.velocity_graph(adata, n_jobs=36, backend="threading")
computing velocities
finished (0:00:12)
computing velocity graph (using 36/48 cores)
finished (0:00:15)
In [7]:
scv.pl.velocity_embedding(adata, basis='umap', frameon=False)
computing velocity embedding
finished (0:00:02)
In [8]:
scv.pl.velocity_embedding_grid(adata, basis='umap', color='leiden_r1', title='', scale=1)
In [9]:
scv.pl.velocity_embedding_stream( adata,
basis='umap',
color=['leiden_r1'],
title='',
dpi=600,
legend_loc='none',
save='velocity_stream' + suffix + '.svg')
saving figure to file ./figures/scvelo_velocity_stream_velocity_RPEnoNaive.svg